{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
   "metadata": {},
   "source": [
    "# Simple Index Demo"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
   "metadata": {},
   "source": [
    "#### Load documents, build the GPTSimpleVectorIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "from gpt_index import (\n",
    "    GPTSimpleVectorIndex, \n",
    "    SimpleDirectoryReader,\n",
    "    LLMPredictor,\n",
    "    ServiceContext\n",
    ")\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.llms import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c48da73f-aadb-480c-8db1-99c915b7cc1c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# LLM Predictor (gpt-3)\n",
    "llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
    "service_context_gpt3 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt3)\n",
    "\n",
    "# LLMPredictor (gpt-4)\n",
    "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
    "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "03d1691e-544b-454f-825b-5ee12f7faa8a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:gpt_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17617 tokens\n",
      "> [build_index_from_nodes] Total embedding token usage: 17617 tokens\n"
     ]
    }
   ],
   "source": [
    "index = GPTSimpleVectorIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2bbccf1d-ac39-427c-b3a3-f8e9d1d12348",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# save index to disk\n",
    "index.save_to_disk('index_simple.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "197ca78e-1310-474d-91e3-877c3636b901",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# load index from disk\n",
    "index = GPTSimpleVectorIndex.load_from_disk('index_simple.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
   "metadata": {},
   "source": [
    "#### Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "95d989ba-0c1d-43b6-a1d3-0ea7135f43a6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from gpt_index.indices.query.query_transform.base import StepDecomposeQueryTransform\n",
    "# gpt-4\n",
    "step_decompose_transform = StepDecomposeQueryTransform(\n",
    "    llm_predictor_gpt4, verbose=True\n",
    ")\n",
    "\n",
    "# gpt-3\n",
    "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(\n",
    "    llm_predictor_gpt3, verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2a124db0-e2d7-4566-bcec-1d41cf669ff4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "index.index_struct.summary = \"Used to answer questions about the author\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "85466fdf-93f3-4cb1-a5f9-0056a8245a6f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33;1m\u001b[1;3m> Current query: Who was in the first batch of the accelerator program the author started?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: Who was in the first batch of the accelerator program the author started?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: Who was in the first batch of the accelerator program the author started?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: None\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query: What accelerator program did the author start?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3444 tokens\n",
      "> [query] Total LLM token usage: 3444 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n",
      "\u001b[33;1m\u001b[1;3m> Current query: Who was in the first batch of the accelerator program the author started?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: Who was in the first batch of the accelerator program the author started?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: Who was in the first batch of the accelerator program the author started?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: \n",
      "- What accelerator program did the author start?\n",
      "- The author started the accelerator program Y Combinator.\n",
      "\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query: Who was in the first batch of Y Combinator?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3474 tokens\n",
      "> [query] Total LLM token usage: 3474 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n",
      "\u001b[33;1m\u001b[1;3m> Current query: Who was in the first batch of the accelerator program the author started?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: Who was in the first batch of the accelerator program the author started?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: Who was in the first batch of the accelerator program the author started?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: \n",
      "- What accelerator program did the author start?\n",
      "- The author started the accelerator program Y Combinator.\n",
      "- Who was in the first batch of Y Combinator?\n",
      "- The first batch of Y Combinator included the founders of Reddit, Justin Kan and Emmett Shear (who later founded Twitch), Aaron Swartz, and Sam Altman.\n",
      "\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query: Who were the participants in the first batch of Y Combinator?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3505 tokens\n",
      "> [query] Total LLM token usage: 3505 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n"
     ]
    }
   ],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "response_gpt4 = index.query(\n",
    "    \"Who was in the first batch of the accelerator program the author started?\",\n",
    "    query_transform=step_decompose_transform,\n",
    "    service_context=service_context_gpt4\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "bdda1b2c-ae46-47cf-91d7-3153e8d0473b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>\n",
       "\n",
       "The participants in the first batch of Y Combinator included the founders of Reddit (Steve Huffman and Alexis Ohanian), Loopt (Sam Altman, Nick Sivo, and Daniel Gross), Weebly (David Rusenko, Chris Fanini, and Dan Veltri), Justin Kan and Emmett Shear (who later founded Twitch), Aaron Swartz (who had helped write the RSS spec and later became a martyr for open access), and Sam Altman (who later became the second president of YC).</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response_gpt4}</b>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "1c9670bd-729d-478b-a77c-c6e13c282456",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('What accelerator program did the author start?', 'The author started the accelerator program Y Combinator.'), ('Who was in the first batch of Y Combinator?', 'The first batch of Y Combinator included the founders of Reddit, Justin Kan and Emmett Shear (who later founded Twitch), Aaron Swartz, and Sam Altman.'), ('Who were the participants in the first batch of Y Combinator?', 'The participants in the first batch of Y Combinator included the founders of Reddit, Justin Kan and Emmett Shear (who later founded Twitch), Aaron Swartz (who had helped write the RSS spec and later became a martyr for open access), and Sam Altman (who later became the second president of YC).')]\n"
     ]
    }
   ],
   "source": [
    "sub_qa = response_gpt4.extra_info[\"sub_qa\"]\n",
    "tuples = [(t[0], t[1].response) for t in sub_qa]\n",
    "print(tuples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ec88df57",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33;1m\u001b[1;3m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: In which city did the author found his first company, Viaweb?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: In which city did the author found his first company, Viaweb?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: None\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query: Who is the author that founded Viaweb?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3485 tokens\n",
      "> [query] Total LLM token usage: 3485 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n",
      "\u001b[33;1m\u001b[1;3m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: In which city did the author found his first company, Viaweb?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: In which city did the author found his first company, Viaweb?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: \n",
      "- Who is the author that founded Viaweb?\n",
      "- The author who founded Viaweb is Paul Graham.\n",
      "\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query: In which city did Paul Graham found his first company, Viaweb?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3493 tokens\n",
      "> [query] Total LLM token usage: 3493 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n",
      "\u001b[33;1m\u001b[1;3m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: In which city did the author found his first company, Viaweb?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: In which city did the author found his first company, Viaweb?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: \n",
      "- Who is the author that founded Viaweb?\n",
      "- The author who founded Viaweb is Paul Graham.\n",
      "- In which city did Paul Graham found his first company, Viaweb?\n",
      "- Paul Graham founded his first company, Viaweb, in Cambridge.\n",
      "\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query: None\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "response_gpt4 = index.query(\n",
    "    \"In which city did the author found his first company, Viaweb?\",\n",
    "    query_transform=step_decompose_transform,\n",
    "    service_context=service_context_gpt4\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "653508f1-b2b0-479a-85b3-113cda507231",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "Answer: Paul Graham founded his first company, Viaweb, in Cambridge, Massachusetts.\n"
     ]
    }
   ],
   "source": [
    "print(response_gpt4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9fa93cdb-7007-4664-853a-5c81c6c17560",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33;1m\u001b[1;3m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: In which city did the author found his first company, Viaweb?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: In which city did the author found his first company, Viaweb?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: None\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query:  Where was the author located when he founded his first company, Viaweb?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3839 tokens\n",
      "> [query] Total LLM token usage: 3839 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n",
      "\u001b[33;1m\u001b[1;3m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: In which city did the author found his first company, Viaweb?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: In which city did the author found his first company, Viaweb?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: \n",
      "-  Where was the author located when he founded his first company, Viaweb?\n",
      "- \n",
      "\n",
      "The author was located in New York when he founded his first company, Viaweb, in 1993. He had been intimately involved with building the infrastructure of the web for most of that time, and it was only 8 years later that he realized the implications of the web for writing and publishing.\n",
      "\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query:  In which year did the author found his first company, Viaweb?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3784 tokens\n",
      "> [query] Total LLM token usage: 3784 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n",
      "\u001b[33;1m\u001b[1;3m> Current query: In which city did the author found his first company, Viaweb?\n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> Formatted prompt: The original question is as follows: In which city did the author found his first company, Viaweb?\n",
      "We have an opportunity to answer some, or all of the question from a knowledge source. Context information for the knowledge source is provided below, as well as previous reasoning steps.\n",
      "Given the context and previous reasoning, return a question that can be answered from the context. This question can be the same as the original question, or this question can represent a subcomponent of the overall question.It should not be irrelevant to the original question.\n",
      "If we cannot extract more information from the context, provide 'None' as the answer. Some examples are given below: \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None\n",
      "Next question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: Who was the winner of the 2020 Australian Open?\n",
      "Knowledge source context: Provides names of the winners of the 2020 Australian Open\n",
      "Previous reasoning: None.\n",
      "New question: Who was the winner of the 2020 Australian Open? \n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: None\n",
      "\n",
      "Question: How many Grand Slam titles does the winner of the 2020 Australian Open have?\n",
      "Knowledge source context: Provides information about the winners of the 2020 Australian Open - includes biographical information for each winner\n",
      "Previous reasoning:\n",
      "- Who was the winner of the 2020 Australian Open? \n",
      "- The winner of the 2020 Australian Open was Novak Djokovic.\n",
      "New question: How many Grand Slam titles does Novak Djokovic have? \n",
      "\n",
      "Question: In which city did the author found his first company, Viaweb?\n",
      "Knowledge source context: Used to answer questions about the author\n",
      "Previous reasoning: \n",
      "-  Where was the author located when he founded his first company, Viaweb?\n",
      "- \n",
      "\n",
      "The author was located in New York when he founded his first company, Viaweb, in 1993. He had been intimately involved with building the infrastructure of the web for most of that time, and it was only 8 years later that he realized the implications of the web for writing and publishing.\n",
      "-  In which year did the author found his first company, Viaweb?\n",
      "- \n",
      "\n",
      "The author founded his first company, Viaweb, in 1995.\n",
      "\n",
      "New question: \n",
      "\u001b[0m\u001b[38;5;200m\u001b[1;3m> New query:  In which city did the author found his first company, Viaweb?\n",
      "\u001b[0mINFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 3792 tokens\n",
      "> [query] Total LLM token usage: 3792 tokens\n",
      "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 14 tokens\n",
      "> [query] Total embedding token usage: 14 tokens\n"
     ]
    }
   ],
   "source": [
    "response_gpt3 = index.query(\n",
    "    \"In which city did the author found his first company, Viaweb?\",\n",
    "    query_transform=step_decompose_transform_gpt3,\n",
    "    service_context=service_context_gpt3\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "05899fcf-7a04-4d21-9e6d-04983755d175",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "The author founded his first company, Viaweb, in Cambridge, Massachusetts in 1993.\n"
     ]
    }
   ],
   "source": [
    "print(response_gpt3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a43c659-ffd0-40df-b52b-032e6647cf9f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama_index",
   "language": "python",
   "name": "llama_index"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
